In [1]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

Using Singular Value Decomposition

wikipedia link

important notes:

  • the mean have to be removed ahead of the decomposition. the decomposition transformation is not affine, which means that the mean will affect the decomposition.

  • Method one is to eigendecompose the covariant matrix X^T X.

  • Method Two is to singular value decompose the design matrix (data) X.

  • the numpy svd function has a bunch of conventions that one need to pay attention to.


In [37]:
# numpy svg example case:

# the numpy doc string says: "the V here is V.H. and the s here is only the diagnal terms"
example_data = np.random.multivariate_normal([1, 1.2], [[0.4, 1], [0.025, 0.2]], size=10000)

U, s, V = np.linalg.svd(example_data)

mat_sigma = np.zeros((10000, 2))
mat_sigma[:2, :2] = np.diag(s)
recovered = U.dot(mat_sigma).dot(V)

np.allclose(example_data, recovered)


Out[37]:
True

In [50]:
fig = plt.figure(figsize=(9, 3))
ax1 = fig.add_subplot(131, aspect="equal")
ax2 = fig.add_subplot(132, aspect="equal")
ax3 = fig.add_subplot(133, aspect="equal")



In [90]:
data = np.random.multivariate_normal([1, 1.2], [[0.4, 1], [0.025, 0.2]], size=10000)
ax1.clear()
ax1.scatter(data[:, 0], data[:, 1], s=0.5, lw='0', color="red", alpha="0.4")
ax1.set_xlim(-6, 6)
ax1.set_ylim(-6, 6)

# now we can try to find the principle components
# First method: using eigenvalue decomposition of the covariance matrix X^T X
centered = data - data.mean(axis=0)
mat_cov = np.dot(centered.T, centered)
val, vec = np.linalg.eig(mat_cov)


T = data.dot(vec)

ax2.clear()
ax2.scatter(T[:, 1], 
            T[:, 0], lw='0', color="#23aaff", alpha="0.3")
ax2.set_xlim(-6, 6)
ax2.set_ylim(-6, 6);

In [91]:
# Now use SVD to find the principle components
U, s, V = np.linalg.svd(centered)

mat_sigma = np.zeros((10000, 2))
mat_sigma[:2, :2] = np.diag(s)
aligned = U.dot(mat_sigma)

ax3.clear()
ax3.scatter(aligned[:, 0], aligned[:, 1], lw='0', color='#23aaff', alpha='0.3')


Out[91]:
<matplotlib.collections.PathCollection at 0x109834a58>

In [89]:
# the singular value decomposition should equal the square root of the eigenvalues.
print(val[::-1], s*s)


[ 10992.57341112    499.90916695] [ 10992.57341112    499.90916695]

In [52]:
# Now try to chose k
retained_variance = 1 - s[:1].sum() / s.sum()
print("the variation that is loss is only {}!!".format(retained_variance))


the variation that is loss is only 0.15922127550788023!!

In [78]:


In [ ]: